Data of 10 cultivars of rice inoculated with B. glumae or mock inoculated. Discoloration of spikelets were recorded and presented as percentage.
library(tidyverse)
library(readxl)
library(ggplot2)
rice_data <- read_excel("Spray-Data-06.18.20.xlsx",
col_types = c("text", "numeric", "numeric",
"numeric", "numeric","numeric"))
rice_data
## # A tibble: 320 x 6
## Genotype Rep `Mock_30C-22C` `Mock_30C-28C` `Pathogen_30C-2…
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 310111 1 5.88 18.6 86.5
## 2 310111 2 2.48 79.7 82.6
## 3 310111 3 2.9 61.7 100
## 4 310111 4 0 67.2 86.1
## 5 310111 5 1.76 75.4 98.6
## 6 310111 6 0 76.6 93.8
## 7 310111 7 NA NA 85.7
## 8 310111 8 NA NA 100
## 9 310111 9 NA NA 100
## 10 310111 10 NA NA 100
## # … with 310 more rows, and 1 more variable: `Pathogen_30C-28C` <dbl>
We still have to “reshape” the table to make it in longer format coding a column for treatment (Mock vs Inoculated) and temperature profile (30-22 vs 30-28).
rice_data_long <- rice_data %>%
pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C"),
names_to = "Inoculation",
values_to = "DiscPerc") %>%
separate(col = Inoculation,
sep = "_",
into = c("Inoculation", "TempProfile")) %>%
unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE)
#kableExtra::kable(rice_data_long, format = "markdown")
Separating mock from pathogen inoculated:
ggplot(data = rice_data_long, aes(x = Genotype, y = DiscPerc)) +
geom_boxplot(aes(fill = TempProfile)) +
facet_grid(. ~ Inoculation) +
coord_flip()
Looking at genotype effect:
ggplot(data = rice_data_long, aes(x = Inoc_Temp, y = DiscPerc)) +
geom_boxplot(aes(fill = TempProfile)) +
facet_wrap(Genotype ~ ., ncol = 5) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Since we are dealing with continous data on four different conditions with need to scale them to estimate their relationships.
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(RColorBrewer)
#Need to remove NAs
rice_data_NoNAs <- na.omit(rice_data)
#Scaling data
rice_matrix <- rice_data_NoNAs[,3:6]
row.names(rice_matrix) <- paste0(rice_data_NoNAs$Genotype,"_",rice_data_NoNAs$Rep)
## Warning: Setting row names on a tibble is deprecated.
#K-means
set.seed(123)
km.res <- kmeans(scale(rice_matrix), 3, nstart = 10)
fviz_cluster(km.res, data = rice_matrix, labelsize = 8)
#Data Clusters
rice_Kcluster <- cbind(rice_data_NoNAs, km.res$cluster)
rice_Kcluster_data <- rice_Kcluster %>%
pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C"),
names_to = "Inoculation",
values_to = "DiscPerc") %>%
separate(col = Inoculation,
sep = "_",
into = c("Inoculation", "TempProfile")) %>%
unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE) %>%
rename(cluster = `km.res$cluster`)
ggplot(rice_Kcluster_data, aes(x = as.factor(cluster), y = DiscPerc)) +
geom_boxplot(aes(fill = Inoc_Temp))
(Kmeans_tolerant <- tibble(rice_Kcluster_data) %>%
filter(cluster == 1) %>%
group_by(Genotype) %>%
count(Genotype))
## # A tibble: 15 x 2
## # Groups: Genotype [15]
## Genotype n
## <chr> <int>
## 1 301161 28
## 2 310131 24
## 3 310219 16
## 4 310354 20
## 5 310645 24
## 6 310747 8
## 7 310802 28
## 8 310998 8
## 9 311151 28
## 10 311206 32
## 11 311385 32
## 12 311600 20
## 13 311642 12
## 14 311677 8
## 15 311795 4
#Clustering
rice_hc <- hcut(rice_matrix, 3, stand = T)
#Graphical view
p <- fviz_dend(rice_hc, rect = T, cex=0.4)
p
#Data Clusters
rice_cluster <- cbind(rice_data_NoNAs, rice_hc$cluster)
rice_cluster_data <- rice_cluster %>%
pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C"),
names_to = "Inoculation",
values_to = "DiscPerc") %>%
separate(col = Inoculation,
sep = "_",
into = c("Inoculation", "TempProfile")) %>%
unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE) %>%
rename(cluster = `rice_hc$cluster`)
ggplot(rice_cluster_data, aes(x = as.factor(cluster), y = DiscPerc)) +
geom_boxplot(aes(fill = Inoc_Temp))
(cluster_tolerant <- tibble(rice_cluster_data) %>%
filter(cluster == 3) %>%
group_by(Genotype) %>%
count(Genotype))
## # A tibble: 14 x 2
## # Groups: Genotype [14]
## Genotype n
## <chr> <int>
## 1 301161 24
## 2 310131 16
## 3 310219 8
## 4 310354 12
## 5 310645 16
## 6 310747 4
## 7 310802 28
## 8 310998 8
## 9 311151 12
## 10 311206 32
## 11 311385 28
## 12 311600 16
## 13 311642 8
## 14 311677 4
full_join(Kmeans_tolerant, cluster_tolerant, by = "Genotype") %>%
rename(Kmeans = n.x, H_cluster = n.y)
## # A tibble: 15 x 3
## # Groups: Genotype [15]
## Genotype Kmeans H_cluster
## <chr> <int> <int>
## 1 301161 28 24
## 2 310131 24 16
## 3 310219 16 8
## 4 310354 20 12
## 5 310645 24 16
## 6 310747 8 4
## 7 310802 28 28
## 8 310998 8 8
## 9 311151 28 12
## 10 311206 32 32
## 11 311385 32 28
## 12 311600 20 16
## 13 311642 12 8
## 14 311677 8 4
## 15 311795 4 NA
This first one is using FactoMiner,
#Need to remove NAs
rice_data_NoNAs <- na.omit(rice_data)
#Creating Matrix for analysis
rice_matrix <- rice_data_NoNAs[,3:6]
row.names(rice_matrix) <- paste0(rice_data_NoNAs$Genotype,"_",rice_data_NoNAs$Rep)
## Warning: Setting row names on a tibble is deprecated.
#PCA
rice_PCA <- prcomp(rice_matrix, center = T, scale. = T)
#Since we have 20 cultivars we need a palette with 20 colors
colors_n <- length(unique(rice_data_NoNAs$Genotype))
getPalette <- colorRampPalette(brewer.pal(9, "Dark2"))
## Warning in brewer.pal(9, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
(PCA_rice <- fviz_pca_biplot(rice_PCA, col.var = "blue",
label = "var", repel = T,
habillage = rice_data_NoNAs$Genotype, addEllipses = TRUE, ellipse.level = 0.95,
ellipse.type ="confidence") +
scale_color_manual(values = getPalette(colors_n)) +
scale_shape_manual(values = c(rep(19, 5), rep(16,5), rep(17,5), rep(18,5))))
## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.
plotly::ggplotly(PCA_rice)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
library(viridis)
## Loading required package: viridisLite
#PCA
rice_PCA <- prcomp(rice_matrix, center = T, scale. = T)
#PCA
biplot <- ggbiplot::ggbiplot(rice_PCA, obs.scale = 1, var.scale = 1) +
geom_text(aes(label=rice_data_NoNAs$Genotype), size = 2, nudge_y = 0.2, alpha=0.5) +
geom_point(aes(colour=rice_data_NoNAs$`Pathogen_30C-22C`)) +
scale_color_viridis(name = "Mock_30C-22C", option = "D")
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
plotly::ggplotly(biplot)
rice_wider <- rice_data %>% pivot_wider(names_from = Rep, names_sep = "_",
values_from = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C")) %>%
column_to_rownames("Genotype")
rice_wider_NoNAs <- rice_data %>%
pivot_wider(names_from = Rep, names_sep = "_",
values_from = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C")) %>%
column_to_rownames("Genotype")